% Main program for bootstrap inference

clear

%% load empirical data with ordered pairs

% outcome variable: profit
data_sorted = csvread("data/data_profit.csv"); 

% standardize continuous X for IPW
X1 = (data_sorted(:,6) - mean(data_sorted(:,6))) / std(data_sorted(:,6));
X2 = (data_sorted(:,15) - mean(data_sorted(:,15))) / std(data_sorted(:,15));
X3 = (data_sorted(:,16) - mean(data_sorted(:,16))) / std(data_sorted(:,16));

X = [X1,X2,X3]; % three continuous matching characteristics

A = data_sorted(:,3); % treatment status

N = size(data_sorted,1); % number of obs
n = sum(A); % number of pairs
n_Y = 1; % number of outcome variables considered

%% setup values
taulist = [0.25;0.75;0.5]; % quantile index
L = length(taulist);

theta_true = 0; % true theta under H0

B = 5000; % number of bootstraps

%% intialization

Deltahat = zeros(n_Y,1);

std_t_naive=zeros(n_Y,1);
t_naive =zeros(n_Y,1);

std_t_adj=zeros(n_Y,1);
t_adj = zeros(n_Y,1);

std_t_adj2=zeros(n_Y,1);
t_adj2 = zeros(n_Y,1);

std_t_ipw = zeros(n_Y,1);
t_ipw      = zeros(n_Y,1);

std_t_b2pair = zeros(n_Y,1);
t_b2pair      = zeros(n_Y,1);

q1 = zeros(n_Y,L); % Y1 quantile
q0= zeros(n_Y,L); % Y0 quantile
q = zeros(n_Y,L+3); % QTEs

std_b2breve = zeros(n_Y,L+3); % naive multiplier bootstrap std
std_b2pair = zeros(n_Y,L+3); % naive multiplier bootstrap of the pairs
std_b2 = zeros(n_Y,L+3); % gradient bootstrap std
std_b2ipw = zeros(n_Y,L+3); % IPW bootstrap std

DV = [data_sorted(:,7:14),data_sorted(:,18:end-1)];

%% Predicted treatment status for IPW bootstrap 
knots1 = quantile(X(:,1),[0.3,0.5]); 
b1 = [max(repmat(X(:,1),[1,size(knots1,2)])-repmat(knots1,[N,1]),0).^1];
knots2 = quantile(X(:,2),[0.3,0.5]); 
b2 = [max(repmat(X(:,2),[1,size(knots2,2)])-repmat(knots2,[N,1]),0).^1];
knots3 = quantile(X(:,3),[0.3,0.5]); 
b3 = [max(repmat(X(:,3),[1,size(knots3,2)])-repmat(knots3,[N,1]),0).^1];
bx = [ones(N,1), b1, b2, b3, X(:,1).*X(:,2), X(:,2).*X(:,3), X(:,1).*X(:,3)];

% matching variables x12 and x13 are nearly exactly the same, hence only
% keep x13 (corresponding to the 18th column in the dataset)
bx = [bx,data_sorted(:,7:14),data_sorted(:,18:end-1)];

d       = size(bx,2);

%% ipw with cross validation
std_T_ipwCV   = zeros(n_Y,1);           % The standard error of CV IPW ATE
TipwCV        = zeros(n_Y,1);           % The value of CV IPW ATE t-statistics 
std_b2ipwCV   = zeros(n_Y,L+3);         % The standard error of CV IPW QTE

%%
for i=1:n_Y % loop over n_Y y variables
    
    rng(i);
    
    Y = data_sorted(:,5);

    %% Naive t-stat
    n1 = n;n0 = N - n1;
    Y1 = Y(A==1);Y0 = Y(A==0);
    Y1bar = sum(Y.*A)/n1;Y0bar = sum(Y.*(1-A))/n0;
    sigma21 = sum((Y - Y1bar).^2.*A)/n1;sigma20 = sum((Y - Y0bar).^2.*(1-A))/n0;
    
    Deltahat(i) = Y1bar - Y0bar; % ATE

    std_t_naive(i) = sqrt(sigma21/n1 + sigma20/n0);
    t_naive(i) = (Deltahat(i) - theta_true)/sqrt(sigma21/n1 + sigma20/n0);
    
    %% adjusted t-stat
    Ysort = Y; % Y and A has already been sorted in the data
    Asort = A;
    Ysort21 = Ysort(Asort == 1); 
    Ysort20 = Ysort(Asort == 0);
    Ysort41 = Ysort21(mod(1:n,2)==1); % Ysort21 1,3,5,7,odd number
    Ysort42 = Ysort20(mod(1:n,2)==1);
    Ysort43 = Ysort21(mod(1:n,2)==0); % Ysort21 2,4,6,8, even number
    Ysort40 = Ysort20(mod(1:n,2)==0);
    Asort41 = ones(n/2,1);
    Asort42 = zeros(n/2,1);
    Asort43 = ones(n/2,1);
    Asort40 = zeros(n/2,1);

    taunsquare = mean((Ysort21 -Ysort20).^2);
    lambdansquare = mean((Ysort41 - Ysort42).*(Ysort43 - Ysort40).*(Asort41 - Asort42).*(Asort43 - Asort40));
    taunsquare2 = sum((Ysort21 -Ysort20 - mean((Ysort21 -Ysort20))).^2); 
    lambdansquare2 = sum(((Ysort41 - Ysort43)-(Ysort42 - Ysort40)).^2);
    std_t_adj(i) = sqrt(taunsquare - 0.5*(lambdansquare + Deltahat(i)^2)) / sqrt(n);
    t_adj(i) = (Deltahat(i) - theta_true)/std_t_adj(i);
    std_t_adj2(i) = sqrt((taunsquare2 + lambdansquare2)/(2*n)) / sqrt(n);
    t_adj2(i) = (Deltahat(i) - theta_true)/std_t_adj2(i);
    
    %% Gradient Bootstrap
    Tstar = zeros(2,L);  h = zeros(2,L);
    q1star = zeros(B,L); % gradient bootstrapped q1 quantile
    q0star = zeros(B,L); % gradient bootstrapped q0 quantile
    qstar = zeros(B,L+3); % gradient bootstrapped QTE
    
    q1breve = zeros(B,L);q0breve = zeros(B,L); qbreve = zeros(B,L+3); % corresponding q1, q0 and q from naive multiplier bootstrap
    q1bpair = zeros(B,L); q0bpair = zeros(B,L); qbpair = zeros(B,L+3); % corresponding q1, q0 and q from naive multiplier bootstrap of the pairs
    q1ipw = zeros(B,L); q0ipw = zeros(B,L); qipw = zeros(B,L+3); % corresponding q1, q0 and q from IPW bootstrap
    
    % QTE estimates
    for j = 1:L
        tau = taulist(j);
        q1(i,j) = prctile(Y1,100*tau); q0(i,j) = prctile(Y0,100*tau); q(i,j) = q1(i,j)-q0(i,j);
    end
    
    q(i,L+1) = q(i,L-1) - q(i,1); % 75% quantile - 25% quantile
    q(i,L+2) = q(i,L) - q(i,1); % 50% quantile - 25% quantile
    q(i,L+3) = q(i,L-1) - q(i,L);  % 75% quantile - 50% quantile
    
    for b = 1:B
        eta = randn(n,1);etahat = randn(n/2,1); % two sequences of i.i.d standard normal random variables
        temp1 = zeros(L,1);temp0 = zeros(L,1);
        
        xi = -log(1-rand(N,1)); % multiplier for simple weight bootstrap and IPW
        
        theta   = ((repmat(xi,1,d).*bx)'*bx/n)^(-1)*((repmat(xi,1,d).*bx)'*A/n);
        Ahat    = bx*theta; 
        
        thetaipw(b) = sum(xi.*A.*Y./Ahat)/(sum(xi.*A./Ahat)) - sum(xi.*(1-A).*Y./(1-Ahat))/(sum(xi.*(1-A)./(1-Ahat))); % IPW ATE
        
        % naive pair ATE
        xi_pair = repelem(xi(n+1:N),2);
        thetab2pair(b) = sum(xi_pair.*A.*Y)/(sum(xi_pair.*A)) - sum(xi_pair.*(1-A).*Y)/(sum(xi_pair.*(1-A))); 
        
        for j = 1:L
            
            tau = taulist(j);
            qtemp1 = prctile(Y1,100*tau);qtemp0 = prctile(Y0,100*tau);
            S_1 = [eta'*(tau - (Ysort21<qtemp1));eta'*(tau - (Ysort20<qtemp0))]; 
            S_2 = [etahat'*(tau - (Ysort41<qtemp1) - (tau - (Ysort43<qtemp1)));etahat'*(tau - (Ysort42<qtemp0) - (tau - (Ysort40<qtemp0)))]; % S2 in Section 5
            Tstar = (S_1+S_2)/sqrt(2);
            h = min(max(floor(n*tau + Tstar+1),1),n);
            temp1(j) = prctile(Y1,h(1)/n*100); temp0(j)= prctile(Y0,h(2)/n*100); % q1 and q0 in gradient bootstrap

            q1breve(b,j) = myqr1(Y,A,ones(N,1)/2,tau*N,xi); q0breve(b,j) = myqr0(Y,A,ones(N,1)/2,tau*N,xi); qbreve(b,j) = q1breve(b,j)-q0breve(b,j); % q1, q0 and q in naive multiplier bootstrap
            q1ipw(b,j) = myqr1(Y,A,Ahat,tau*N,xi); q0ipw(b,j) = myqr0(Y,A,Ahat,tau*N,xi); qipw(b,j) = q1ipw(b,j)-q0ipw(b,j); % q1, q0 and q in IPW bootstrap
            
            
            q1bpair(b,j) = myqr1(Y,A,ones(N,1)/2,tau*N,xi_pair); q0bpair(b,j) = myqr0(Y,A,ones(N,1)/2,tau*N,xi_pair); qbpair(b,j) = q1bpair(b,j)-q0bpair(b,j); % q1, q0 and q in naive multiplier bootstrap of the pairs
            
        end
        
        q1star(b,:)= temp1;q0star(b,:) = temp0;
        
    end

    qstar(:,1:L) = q1star-q0star;
    qstar(:,L+1) = qstar(:,L-1) - qstar(:,1);
    qstar(:,L+2) = qstar(:,L) - qstar(:,1);
    qstar(:,L+3) = qstar(:,L-1) - qstar(:,L);
    std_b2(i,:) = ((prctile(qstar,97.5) - prctile(qstar,2.5)))/(norminv(0.975) - norminv(0.025));
    
    qbreve(:,L+1) = qbreve(:,L-1) - qbreve(:,1);
    qbreve(:,L+2) = qbreve(:,L) - qbreve(:,1);
    qbreve(:,L+3) = qbreve(:,L-1) - qbreve(:,L);
    
    qbpair(:,L+1)    = qbpair(:,L-1) - qbpair(:,1);
    qbpair(:,L+2) = qbpair(:,L) - qbpair(:,1);
    qbpair(:,L+3) = qbpair(:,L-1) - qbpair(:,L);
    
    qipw(:,L+1)    = qipw(:,L-1) - qipw(:,1);
    qipw(:,L+2) = qipw(:,L) - qipw(:,1);
    qipw(:,L+3) = qipw(:,L-1) - qipw(:,L);
    
    std_b2breve(i,:) = ((prctile(qbreve,97.5) - prctile(qbreve,2.5)))/(norminv(0.975) - norminv(0.025));
    std_b2ipw(i,:)     = ((prctile(qipw,97.5) - prctile(qipw,2.5)))/(norminv(0.975) - norminv(0.025));
    std_b2pair(i,:)     = ((prctile(qbpair,97.5) - prctile(qbpair,2.5)))/(norminv(0.975) - norminv(0.025));
    
    std_t_ipw(i) = ((prctile(thetaipw,97.5) - prctile(thetaipw,2.5)))/(norminv(0.975) - norminv(0.025));
    std_t_b2pair(i) = ((prctile(thetab2pair,97.5) - prctile(thetab2pair,2.5)))/(norminv(0.975) - norminv(0.025));
    
    % t statistic for IPW ATE    
    t_ipw(i)      = (Deltahat(i) - theta_true)/std_t_ipw(i);
    
    % t statistics for naive multiplier bootstrap of pairs ATE
    t_b2pair(i) = (Deltahat(i) - theta_true)/std_t_b2pair(i);
    
    %% IPW with cross validation
    X_CV = [X,DV];
    
    [TipwCV(i,:),std_T_ipwCV(i,:),~,std_b2ipwCV(i,:),~,~,~,~,~,~,~,~] = CVSieveBasisIPWType2(Y,X_CV,A,n,taulist,theta_true,B);
end

%% test statistics

stat_qstar = (q - theta_true) ./ std_b2;
stat_qbreve = (q - theta_true) ./ std_b2breve;
stat_ipw = (q - theta_true) ./ std_b2ipw;
stat_qbpair = (q - theta_true) ./ std_b2pair;
stat_ipw_cv = (q - theta_true) ./ std_b2ipwCV;


